{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('/projects/leelab2/data/AD_DATA/Nicasia/processed/PCG_normalized/no_covar_correction/MSBB_RNA.tsv', \n",
    "                 sep='\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_df = pd.read_csv('/projects/leelab2/data/AD_DATA/Nicasia/processed/samples_neuropath_prenorm/MSBB_RNA.tsv',\n",
    "                  sep='\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_df = df.T\n",
    "X_df.columns = X_df.iloc[0]\n",
    "X_df.drop('PCG', axis=0, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_df.dropna(how='any', axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_df.index = X_df.index.astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>PCG</th>\n",
       "      <th>MT-ND1</th>\n",
       "      <th>MT-ND2</th>\n",
       "      <th>MT-CO1</th>\n",
       "      <th>MT-CO2</th>\n",
       "      <th>MT-ATP8</th>\n",
       "      <th>MT-ATP6</th>\n",
       "      <th>MT-CO3</th>\n",
       "      <th>MT-ND3</th>\n",
       "      <th>MT-ND4L</th>\n",
       "      <th>MT-ND4</th>\n",
       "      <th>...</th>\n",
       "      <th>ICA1</th>\n",
       "      <th>NXPE1</th>\n",
       "      <th>GPC6</th>\n",
       "      <th>USP49</th>\n",
       "      <th>DNAL4</th>\n",
       "      <th>PPM1F</th>\n",
       "      <th>CCDC94</th>\n",
       "      <th>CLN6</th>\n",
       "      <th>ITIH2</th>\n",
       "      <th>ABCA8</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>16955</th>\n",
       "      <td>0.654623</td>\n",
       "      <td>0.594919</td>\n",
       "      <td>0.593017</td>\n",
       "      <td>0.643478</td>\n",
       "      <td>0.599661</td>\n",
       "      <td>0.607243</td>\n",
       "      <td>0.583352</td>\n",
       "      <td>0.557391</td>\n",
       "      <td>0.614474</td>\n",
       "      <td>0.580318</td>\n",
       "      <td>...</td>\n",
       "      <td>0.52822</td>\n",
       "      <td>0.358615</td>\n",
       "      <td>0.465543</td>\n",
       "      <td>0.276988</td>\n",
       "      <td>0.508196</td>\n",
       "      <td>0.42552</td>\n",
       "      <td>0.812007</td>\n",
       "      <td>0.437408</td>\n",
       "      <td>0.393262</td>\n",
       "      <td>0.782366</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16365</th>\n",
       "      <td>0.66153</td>\n",
       "      <td>0.645178</td>\n",
       "      <td>0.589168</td>\n",
       "      <td>0.663937</td>\n",
       "      <td>0.682139</td>\n",
       "      <td>0.683116</td>\n",
       "      <td>0.597832</td>\n",
       "      <td>0.66983</td>\n",
       "      <td>0.650778</td>\n",
       "      <td>0.655711</td>\n",
       "      <td>...</td>\n",
       "      <td>0.560069</td>\n",
       "      <td>0.301708</td>\n",
       "      <td>0.424021</td>\n",
       "      <td>0.513783</td>\n",
       "      <td>0.479941</td>\n",
       "      <td>0.504961</td>\n",
       "      <td>0.825038</td>\n",
       "      <td>0.67596</td>\n",
       "      <td>0.4415</td>\n",
       "      <td>0.658782</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17155</th>\n",
       "      <td>0.504562</td>\n",
       "      <td>0.513928</td>\n",
       "      <td>0.472014</td>\n",
       "      <td>0.520246</td>\n",
       "      <td>0.499945</td>\n",
       "      <td>0.508193</td>\n",
       "      <td>0.450339</td>\n",
       "      <td>0.552438</td>\n",
       "      <td>0.504349</td>\n",
       "      <td>0.496753</td>\n",
       "      <td>...</td>\n",
       "      <td>0.325873</td>\n",
       "      <td>0.553411</td>\n",
       "      <td>0.337982</td>\n",
       "      <td>0.599811</td>\n",
       "      <td>0.430225</td>\n",
       "      <td>0.638091</td>\n",
       "      <td>0.911944</td>\n",
       "      <td>0.653311</td>\n",
       "      <td>0.524825</td>\n",
       "      <td>0.803247</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17135</th>\n",
       "      <td>0.634323</td>\n",
       "      <td>0.61204</td>\n",
       "      <td>0.561811</td>\n",
       "      <td>0.633328</td>\n",
       "      <td>0.661446</td>\n",
       "      <td>0.64191</td>\n",
       "      <td>0.550929</td>\n",
       "      <td>0.629559</td>\n",
       "      <td>0.632791</td>\n",
       "      <td>0.61216</td>\n",
       "      <td>...</td>\n",
       "      <td>0.520202</td>\n",
       "      <td>0.192491</td>\n",
       "      <td>0.269302</td>\n",
       "      <td>0.533864</td>\n",
       "      <td>0.494833</td>\n",
       "      <td>0.526283</td>\n",
       "      <td>0.736145</td>\n",
       "      <td>0.579555</td>\n",
       "      <td>0.452142</td>\n",
       "      <td>0.629236</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16625</th>\n",
       "      <td>0.678976</td>\n",
       "      <td>0.616075</td>\n",
       "      <td>0.657637</td>\n",
       "      <td>0.673101</td>\n",
       "      <td>0.590893</td>\n",
       "      <td>0.627364</td>\n",
       "      <td>0.629435</td>\n",
       "      <td>0.653531</td>\n",
       "      <td>0.641366</td>\n",
       "      <td>0.602793</td>\n",
       "      <td>...</td>\n",
       "      <td>0.468304</td>\n",
       "      <td>0.580736</td>\n",
       "      <td>0.433354</td>\n",
       "      <td>0.502173</td>\n",
       "      <td>0.487502</td>\n",
       "      <td>0.570485</td>\n",
       "      <td>0.76894</td>\n",
       "      <td>0.485467</td>\n",
       "      <td>0.478939</td>\n",
       "      <td>0.749352</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 14932 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "PCG      MT-ND1    MT-ND2    MT-CO1    MT-CO2   MT-ATP8   MT-ATP6    MT-CO3  \\\n",
       "16955  0.654623  0.594919  0.593017  0.643478  0.599661  0.607243  0.583352   \n",
       "16365   0.66153  0.645178  0.589168  0.663937  0.682139  0.683116  0.597832   \n",
       "17155  0.504562  0.513928  0.472014  0.520246  0.499945  0.508193  0.450339   \n",
       "17135  0.634323   0.61204  0.561811  0.633328  0.661446   0.64191  0.550929   \n",
       "16625  0.678976  0.616075  0.657637  0.673101  0.590893  0.627364  0.629435   \n",
       "\n",
       "PCG      MT-ND3   MT-ND4L    MT-ND4  ...      ICA1     NXPE1      GPC6  \\\n",
       "16955  0.557391  0.614474  0.580318  ...   0.52822  0.358615  0.465543   \n",
       "16365   0.66983  0.650778  0.655711  ...  0.560069  0.301708  0.424021   \n",
       "17155  0.552438  0.504349  0.496753  ...  0.325873  0.553411  0.337982   \n",
       "17135  0.629559  0.632791   0.61216  ...  0.520202  0.192491  0.269302   \n",
       "16625  0.653531  0.641366  0.602793  ...  0.468304  0.580736  0.433354   \n",
       "\n",
       "PCG       USP49     DNAL4     PPM1F    CCDC94      CLN6     ITIH2     ABCA8  \n",
       "16955  0.276988  0.508196   0.42552  0.812007  0.437408  0.393262  0.782366  \n",
       "16365  0.513783  0.479941  0.504961  0.825038   0.67596    0.4415  0.658782  \n",
       "17155  0.599811  0.430225  0.638091  0.911944  0.653311  0.524825  0.803247  \n",
       "17135  0.533864  0.494833  0.526283  0.736145  0.579555  0.452142  0.629236  \n",
       "16625  0.502173  0.487502  0.570485   0.76894  0.485467  0.478939  0.749352  \n",
       "\n",
       "[5 rows x 14932 columns]"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_df.set_index('sample_name', inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_df = y_df.loc[X_df.index]\n",
    "y_df.dropna(how='any', subset=['AD'], inplace=True)\n",
    "X_df = X_df.loc[y_df.index]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = y_df['AD'].values.astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>PCG</th>\n",
       "      <th>MT-ND1</th>\n",
       "      <th>MT-ND2</th>\n",
       "      <th>MT-CO1</th>\n",
       "      <th>MT-CO2</th>\n",
       "      <th>MT-ATP8</th>\n",
       "      <th>MT-ATP6</th>\n",
       "      <th>MT-CO3</th>\n",
       "      <th>MT-ND3</th>\n",
       "      <th>MT-ND4L</th>\n",
       "      <th>MT-ND4</th>\n",
       "      <th>...</th>\n",
       "      <th>ICA1</th>\n",
       "      <th>NXPE1</th>\n",
       "      <th>GPC6</th>\n",
       "      <th>USP49</th>\n",
       "      <th>DNAL4</th>\n",
       "      <th>PPM1F</th>\n",
       "      <th>CCDC94</th>\n",
       "      <th>CLN6</th>\n",
       "      <th>ITIH2</th>\n",
       "      <th>ABCA8</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>16955</th>\n",
       "      <td>0.654623</td>\n",
       "      <td>0.594919</td>\n",
       "      <td>0.593017</td>\n",
       "      <td>0.643478</td>\n",
       "      <td>0.599661</td>\n",
       "      <td>0.607243</td>\n",
       "      <td>0.583352</td>\n",
       "      <td>0.557391</td>\n",
       "      <td>0.614474</td>\n",
       "      <td>0.580318</td>\n",
       "      <td>...</td>\n",
       "      <td>0.52822</td>\n",
       "      <td>0.358615</td>\n",
       "      <td>0.465543</td>\n",
       "      <td>0.276988</td>\n",
       "      <td>0.508196</td>\n",
       "      <td>0.42552</td>\n",
       "      <td>0.812007</td>\n",
       "      <td>0.437408</td>\n",
       "      <td>0.393262</td>\n",
       "      <td>0.782366</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16365</th>\n",
       "      <td>0.66153</td>\n",
       "      <td>0.645178</td>\n",
       "      <td>0.589168</td>\n",
       "      <td>0.663937</td>\n",
       "      <td>0.682139</td>\n",
       "      <td>0.683116</td>\n",
       "      <td>0.597832</td>\n",
       "      <td>0.66983</td>\n",
       "      <td>0.650778</td>\n",
       "      <td>0.655711</td>\n",
       "      <td>...</td>\n",
       "      <td>0.560069</td>\n",
       "      <td>0.301708</td>\n",
       "      <td>0.424021</td>\n",
       "      <td>0.513783</td>\n",
       "      <td>0.479941</td>\n",
       "      <td>0.504961</td>\n",
       "      <td>0.825038</td>\n",
       "      <td>0.67596</td>\n",
       "      <td>0.4415</td>\n",
       "      <td>0.658782</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17155</th>\n",
       "      <td>0.504562</td>\n",
       "      <td>0.513928</td>\n",
       "      <td>0.472014</td>\n",
       "      <td>0.520246</td>\n",
       "      <td>0.499945</td>\n",
       "      <td>0.508193</td>\n",
       "      <td>0.450339</td>\n",
       "      <td>0.552438</td>\n",
       "      <td>0.504349</td>\n",
       "      <td>0.496753</td>\n",
       "      <td>...</td>\n",
       "      <td>0.325873</td>\n",
       "      <td>0.553411</td>\n",
       "      <td>0.337982</td>\n",
       "      <td>0.599811</td>\n",
       "      <td>0.430225</td>\n",
       "      <td>0.638091</td>\n",
       "      <td>0.911944</td>\n",
       "      <td>0.653311</td>\n",
       "      <td>0.524825</td>\n",
       "      <td>0.803247</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17135</th>\n",
       "      <td>0.634323</td>\n",
       "      <td>0.61204</td>\n",
       "      <td>0.561811</td>\n",
       "      <td>0.633328</td>\n",
       "      <td>0.661446</td>\n",
       "      <td>0.64191</td>\n",
       "      <td>0.550929</td>\n",
       "      <td>0.629559</td>\n",
       "      <td>0.632791</td>\n",
       "      <td>0.61216</td>\n",
       "      <td>...</td>\n",
       "      <td>0.520202</td>\n",
       "      <td>0.192491</td>\n",
       "      <td>0.269302</td>\n",
       "      <td>0.533864</td>\n",
       "      <td>0.494833</td>\n",
       "      <td>0.526283</td>\n",
       "      <td>0.736145</td>\n",
       "      <td>0.579555</td>\n",
       "      <td>0.452142</td>\n",
       "      <td>0.629236</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16625</th>\n",
       "      <td>0.678976</td>\n",
       "      <td>0.616075</td>\n",
       "      <td>0.657637</td>\n",
       "      <td>0.673101</td>\n",
       "      <td>0.590893</td>\n",
       "      <td>0.627364</td>\n",
       "      <td>0.629435</td>\n",
       "      <td>0.653531</td>\n",
       "      <td>0.641366</td>\n",
       "      <td>0.602793</td>\n",
       "      <td>...</td>\n",
       "      <td>0.468304</td>\n",
       "      <td>0.580736</td>\n",
       "      <td>0.433354</td>\n",
       "      <td>0.502173</td>\n",
       "      <td>0.487502</td>\n",
       "      <td>0.570485</td>\n",
       "      <td>0.76894</td>\n",
       "      <td>0.485467</td>\n",
       "      <td>0.478939</td>\n",
       "      <td>0.749352</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>682</th>\n",
       "      <td>0.393416</td>\n",
       "      <td>0.36284</td>\n",
       "      <td>0.358383</td>\n",
       "      <td>0.301956</td>\n",
       "      <td>0.397932</td>\n",
       "      <td>0.39316</td>\n",
       "      <td>0.373112</td>\n",
       "      <td>0.238552</td>\n",
       "      <td>0.345279</td>\n",
       "      <td>0.375445</td>\n",
       "      <td>...</td>\n",
       "      <td>0.780526</td>\n",
       "      <td>0.616575</td>\n",
       "      <td>0.418697</td>\n",
       "      <td>0.801493</td>\n",
       "      <td>0.511302</td>\n",
       "      <td>0.203806</td>\n",
       "      <td>0.264012</td>\n",
       "      <td>0.318857</td>\n",
       "      <td>0.51828</td>\n",
       "      <td>0.43914</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13081</th>\n",
       "      <td>0.843544</td>\n",
       "      <td>0.821458</td>\n",
       "      <td>0.826257</td>\n",
       "      <td>0.810105</td>\n",
       "      <td>0.719293</td>\n",
       "      <td>0.740682</td>\n",
       "      <td>0.78514</td>\n",
       "      <td>0.801371</td>\n",
       "      <td>0.865207</td>\n",
       "      <td>0.808495</td>\n",
       "      <td>...</td>\n",
       "      <td>0.434146</td>\n",
       "      <td>0.614986</td>\n",
       "      <td>0.317003</td>\n",
       "      <td>0.807957</td>\n",
       "      <td>0.374636</td>\n",
       "      <td>0.695497</td>\n",
       "      <td>0.372087</td>\n",
       "      <td>0.585078</td>\n",
       "      <td>0.438378</td>\n",
       "      <td>0.320362</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12464</th>\n",
       "      <td>0.870595</td>\n",
       "      <td>0.872773</td>\n",
       "      <td>0.706356</td>\n",
       "      <td>0.749624</td>\n",
       "      <td>0.759739</td>\n",
       "      <td>0.79556</td>\n",
       "      <td>0.670066</td>\n",
       "      <td>0.782321</td>\n",
       "      <td>0.927681</td>\n",
       "      <td>0.831379</td>\n",
       "      <td>...</td>\n",
       "      <td>0.484279</td>\n",
       "      <td>0.64155</td>\n",
       "      <td>0.221434</td>\n",
       "      <td>0.82728</td>\n",
       "      <td>0.23247</td>\n",
       "      <td>0.625354</td>\n",
       "      <td>0.419283</td>\n",
       "      <td>0.473149</td>\n",
       "      <td>0.609029</td>\n",
       "      <td>0.23363</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>787</th>\n",
       "      <td>0.322793</td>\n",
       "      <td>0.345305</td>\n",
       "      <td>0.198892</td>\n",
       "      <td>0.251081</td>\n",
       "      <td>0.372707</td>\n",
       "      <td>0.296663</td>\n",
       "      <td>0.244595</td>\n",
       "      <td>0.163694</td>\n",
       "      <td>0.226429</td>\n",
       "      <td>0.275967</td>\n",
       "      <td>...</td>\n",
       "      <td>0.804263</td>\n",
       "      <td>0.290121</td>\n",
       "      <td>0.449655</td>\n",
       "      <td>0.653916</td>\n",
       "      <td>0.404921</td>\n",
       "      <td>0</td>\n",
       "      <td>0.622784</td>\n",
       "      <td>0.198843</td>\n",
       "      <td>0.583232</td>\n",
       "      <td>0.633323</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>153</th>\n",
       "      <td>0.507381</td>\n",
       "      <td>0.492293</td>\n",
       "      <td>0.240026</td>\n",
       "      <td>0.245889</td>\n",
       "      <td>0.561824</td>\n",
       "      <td>0.464764</td>\n",
       "      <td>0.258812</td>\n",
       "      <td>0.295172</td>\n",
       "      <td>0.489611</td>\n",
       "      <td>0.48755</td>\n",
       "      <td>...</td>\n",
       "      <td>0.842878</td>\n",
       "      <td>0.485609</td>\n",
       "      <td>0.474916</td>\n",
       "      <td>0.379303</td>\n",
       "      <td>0.843962</td>\n",
       "      <td>0.768726</td>\n",
       "      <td>0.619257</td>\n",
       "      <td>0.524815</td>\n",
       "      <td>0.452575</td>\n",
       "      <td>0.36854</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>879 rows × 14932 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "PCG      MT-ND1    MT-ND2    MT-CO1    MT-CO2   MT-ATP8   MT-ATP6    MT-CO3  \\\n",
       "16955  0.654623  0.594919  0.593017  0.643478  0.599661  0.607243  0.583352   \n",
       "16365   0.66153  0.645178  0.589168  0.663937  0.682139  0.683116  0.597832   \n",
       "17155  0.504562  0.513928  0.472014  0.520246  0.499945  0.508193  0.450339   \n",
       "17135  0.634323   0.61204  0.561811  0.633328  0.661446   0.64191  0.550929   \n",
       "16625  0.678976  0.616075  0.657637  0.673101  0.590893  0.627364  0.629435   \n",
       "...         ...       ...       ...       ...       ...       ...       ...   \n",
       "682    0.393416   0.36284  0.358383  0.301956  0.397932   0.39316  0.373112   \n",
       "13081  0.843544  0.821458  0.826257  0.810105  0.719293  0.740682   0.78514   \n",
       "12464  0.870595  0.872773  0.706356  0.749624  0.759739   0.79556  0.670066   \n",
       "787    0.322793  0.345305  0.198892  0.251081  0.372707  0.296663  0.244595   \n",
       "153    0.507381  0.492293  0.240026  0.245889  0.561824  0.464764  0.258812   \n",
       "\n",
       "PCG      MT-ND3   MT-ND4L    MT-ND4  ...      ICA1     NXPE1      GPC6  \\\n",
       "16955  0.557391  0.614474  0.580318  ...   0.52822  0.358615  0.465543   \n",
       "16365   0.66983  0.650778  0.655711  ...  0.560069  0.301708  0.424021   \n",
       "17155  0.552438  0.504349  0.496753  ...  0.325873  0.553411  0.337982   \n",
       "17135  0.629559  0.632791   0.61216  ...  0.520202  0.192491  0.269302   \n",
       "16625  0.653531  0.641366  0.602793  ...  0.468304  0.580736  0.433354   \n",
       "...         ...       ...       ...  ...       ...       ...       ...   \n",
       "682    0.238552  0.345279  0.375445  ...  0.780526  0.616575  0.418697   \n",
       "13081  0.801371  0.865207  0.808495  ...  0.434146  0.614986  0.317003   \n",
       "12464  0.782321  0.927681  0.831379  ...  0.484279   0.64155  0.221434   \n",
       "787    0.163694  0.226429  0.275967  ...  0.804263  0.290121  0.449655   \n",
       "153    0.295172  0.489611   0.48755  ...  0.842878  0.485609  0.474916   \n",
       "\n",
       "PCG       USP49     DNAL4     PPM1F    CCDC94      CLN6     ITIH2     ABCA8  \n",
       "16955  0.276988  0.508196   0.42552  0.812007  0.437408  0.393262  0.782366  \n",
       "16365  0.513783  0.479941  0.504961  0.825038   0.67596    0.4415  0.658782  \n",
       "17155  0.599811  0.430225  0.638091  0.911944  0.653311  0.524825  0.803247  \n",
       "17135  0.533864  0.494833  0.526283  0.736145  0.579555  0.452142  0.629236  \n",
       "16625  0.502173  0.487502  0.570485   0.76894  0.485467  0.478939  0.749352  \n",
       "...         ...       ...       ...       ...       ...       ...       ...  \n",
       "682    0.801493  0.511302  0.203806  0.264012  0.318857   0.51828   0.43914  \n",
       "13081  0.807957  0.374636  0.695497  0.372087  0.585078  0.438378  0.320362  \n",
       "12464   0.82728   0.23247  0.625354  0.419283  0.473149  0.609029   0.23363  \n",
       "787    0.653916  0.404921         0  0.622784  0.198843  0.583232  0.633323  \n",
       "153    0.379303  0.843962  0.768726  0.619257  0.524815  0.452575   0.36854  \n",
       "\n",
       "[879 rows x 14932 columns]"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, y_train, \\\n",
    "X_vald,  y_vald, \\\n",
    "X_test,  y_test = data.load_data()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
